##########################################################
# John Henderson
# Replication Data for: "Hookworm Eradication as a Natural 
#   Experiment for Schooling and Voting in the American 
#   South", Forthcoming in Political Behavior, May 20, 2017
# 
##########################################################
#
#  table_1_3_campaign_14to17.R
#  -- file produces the main first stage and IV results for 
#	  Table 1 & 3 before and after matching
#     for the binary RSC Campaign intervention 
#		:: ages 14 to 17 
#
##########################################################

rm(list=ls())

library(Matching)
library(AER)                           
library(foreign)

setwd('~/Dropbox/hookworm/replication')
source('funs/lower.bound.R')
            
load('data/hookwormData.Rdata') 

##########################################################  
# data required 
#  - mMat :: matching matrix used in matching and balancing
#  - oMat :: outcome matrix of county education and turnout rates
#  - zMat :: intervention matrix 
#  - pMat :: placebo matrix
# hookSouth :: combines all objects into one dataset, w/ additional covariates  
##########################################################

##########################################################
##########################################################
# BEFORE MATCHING
##########################################################
##########################################################

d1=oMat$school1920_14to17
d0=oMat$school1910_14to17    

y0=rowMeans(cbind(oMat$vote1916,oMat$vote1920))
y1=rowMeans(cbind(oMat$vote1928,oMat$vote1932))

tr=zMat$z_campaign

d=d1-d0
y=y1-y0
d_dd=c(d0,d1)
y_dd=c(y0,y1)
tr_dd=c(tr,tr) 

xmat=mMat[,-c(1:3)][,1:19]
xmat=as.matrix(rbind(xmat,xmat))

year=c(rep(0,length(tr)),rep(1,length(tr)))
temps=as.data.frame(cbind(d_dd,y_dd,tr_dd,year))   
 
### first stage 
f0=summary(ivreg(d_dd~year*tr_dd+xmat))
f0$coef[nrow(f0$coef),]
#    Estimate  Std. Error     t value    Pr(>|t|) 
# 0.020777759 0.007309466 2.842582520 0.004510323

### itt
r0=summary(ivreg(y_dd~year*tr_dd+xmat))
r0$coef[nrow(r0$coef),]
#     Estimate   Std. Error      t value     Pr(>|t|) 
# 0.0261796119 0.0077856412 3.3625505236 0.0007835832 
    
### iv estimate
m0=summary(ivreg(y_dd~d_dd+year+tr_dd+xmat | xmat+tr_dd*year))    
m0$coef[2,]
#   Estimate Std. Error    t value   Pr(>|t|) 
# 1.25998245 0.56500258 2.23004726 0.02583046

# set up data for F test
                                      
tr_dd_year=tr_dd*year  
campaign=as.data.frame(cbind(y_dd,d_dd,tr_dd,tr_dd_year,year,xmat))
write.dta(campaign,'fstat/campaignBEFORE_14to17.dta')        
                                                                                             

##########################################################
##########################################################
# AFTER MATCHING
##########################################################
##########################################################

mMat=bMat=mMat[,-c(1:3)][,1:19]

# pre-sets for 'lower.bound' fitfunc :: lower.bound only improves if total balance improves
floors=NULL					# :: allows to preset values at the floor overall
floor.values=NULL           # :: allows to preset values at the floor for each covariate
reweighted = -1 			# :: fits over a single summary measure of balance
exacts=array(0,ncol(mMat))  # :: exact matching for covariates 
exacts[1]=0    

starts=c(0.8639389,12.5338719,14.1059201,232.6227015,13.3530437,0.8680492,809.5765534,2.3393068,1.8030039,0.9325587,44.5785061,1.5006086,65.5576631,2.8099156,9.4050025,43.4617957,453.9672492,357.1535636,388.9175985)     


set.seed(1005)
genout=GenMatch(starting.values=starts,exact=exacts,MemoryMatrix=T,fit.func=lower.bound,Tr=tr,X=mMat,BalanceMatrix=bMat,M=1,pop.size=2,max.generations=5,wait.generations=3,hard.generation.limit=T,estimand="ATT",ties=T)

mout=(Match(Tr=tr,X=mMat,estimand="ATT",M=1,Weight.matrix=genout,exact=exacts,ties=T))

dtr=c(d0[mout$index.treated],d1[mout$index.treated])
dct=c(d0[mout$index.control],d1[mout$index.control])
ytr=c(y0[mout$index.treated],y1[mout$index.treated])
yct=c(y0[mout$index.control],y1[mout$index.control])
trt=c(tr[mout$index.treated],tr[mout$index.treated])
tct=c(tr[mout$index.control],tr[mout$index.control])

year_tr=c(year[1:length(d0)][mout$index.treated],year[(1+length(d1)):length(year)][mout$index.treated])
year_ct=c(year[1:length(d0)][mout$index.control],year[(1+length(d1)):length(year)][mout$index.control])
w=c(mout$weights,mout$weights) 

xmat=as.matrix(bMat[c(mout$index.treated,mout$index.treated,mout$index.control,mout$index.control),])

dmat=c(dtr,dct)
ymat=c(ytr,yct)
zmat=c(trt,tct)

yearmat=c(year_tr,year_ct)
wmat=c(w,w)
                                                                                                                   
### first stage 
f1=summary(ivreg(dmat~yearmat*zmat+xmat,weights=wmat))             
f1$coef[nrow(f1$coef),]
#     Estimate   Std. Error      t value     Pr(>|t|) 
# 1.581096e-02 3.382957e-03 4.673712e+00 3.003367e-06

### itt
r1=summary(ivreg(ymat~yearmat*zmat+xmat,weights=wmat))                                                          
r1$coef[nrow(r1$coef),]
#     Estimate   Std. Error      t value     Pr(>|t|) 
# 0.0135764933 0.0036032494 3.7678472353 0.0001657697 

### iv estimate
m1=summary(ivreg(ymat~dmat+yearmat+zmat+xmat | xmat+zmat*yearmat,weights=wmat))    
m1$coef[2,]
#   Estimate Std. Error    t value   Pr(>|t|) 
# 0.85867589 0.29134528 2.94727920 0.00321451
 
y_dd=ymat
d_dd=dmat
year=yearmat
tr_dd=zmat
xmat=xmat 
tr_dd_year=zmat*yearmat
campaign=as.data.frame(cbind(y_dd,d_dd,tr_dd,tr_dd_year,year,xmat,wmat))
write.dta(campaign,'fstat/campaignAFTER_14to17.dta')   

system("rm fstat/table_3_iv_fstats_campaign_14to17.log")                           
system("/Applications/Stata/StataSE.app/Contents/MacOS/stata-se -b do fstat/table_3_iv_fstats_campaign_14to17.do &")
system("mv table_3_iv_fstats_campaign_14to17.log fstat/") 

#END